This documnet is a practice analysis on Ziesel dataset
# Import Ziesel dataset
dat <- read.csv("Zeisel_preprocessed.csv", row.names = 1)
cell_type <- read.table("Zeisel_cell_info.txt", sep = "\t", header = 1)
# Get the labels for each cell
cluster_labels <- as.numeric(as.factor(cell_type$level1class))
rand_ind <- sample(nrow(dat), 300)
sub_dat <- dat[rand_ind, ]
sub_celltype <- cell_type[rand_ind, ]
sub_cluster_labels <- as.numeric(as.factor(sub_celltype$level1class))
cor_pearson_mat <- stats::cor(sub_dat, method = "pearson")
cor_pearson_mat[upper.tri(cor_pearson_mat, diag = T)] <- NA
cor_pearson_mat[1:5,1:5]
## Vip Sst Npy Reln Cnr1
## Vip NA NA NA NA NA
## Sst -0.2176471 NA NA NA NA
## Npy 0.5250629 0.2201366 NA NA NA
## Reln 0.4961528 0.0895432 0.9590302 NA NA
## Cnr1 0.8044119 -0.1738645 0.5893936 0.6388004 NA
# plot the smallest correlations
cor_pearson_vec <- sort(abs(cor_pearson_mat), decreasing = T)
plot(cor_pearson_vec)
#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_pearson_mat) == cor_pearson_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}
#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_pearson_mat) == rev(cor_pearson_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}
cor_spearman_mat <- stats::cor(sub_dat, method = "spearman")
cor_spearman_mat[upper.tri(cor_spearman_mat, diag = T)] <- NA
cor_spearman_mat[1:5,1:5]
## Vip Sst Npy Reln Cnr1
## Vip NA NA NA NA NA
## Sst -0.09100946 NA NA NA NA
## Npy 0.28743030 0.3198249 NA NA NA
## Reln -0.01608196 0.1414216 0.6553282 NA NA
## Cnr1 0.24113246 -0.1200782 0.1168631 0.3423456 NA
# plot the smallest correlations
cor_spearman_vec <- sort(abs(cor_spearman_mat), decreasing = T)
plot(cor_spearman_vec)
#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_spearman_mat) == cor_spearman_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}
#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_spearman_mat) == rev(cor_spearman_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}
cor_kendall_mat <- stats::cor(sub_dat, method = "kendall")
cor_kendall_mat[upper.tri(cor_kendall_mat, diag = T)] <- NA
cor_kendall_mat[1:5,1:5]
## Vip Sst Npy Reln Cnr1
## Vip NA NA NA NA NA
## Sst 0.08704571 NA NA NA NA
## Npy 0.30934225 0.40294314 NA NA NA
## Reln 0.08860647 0.21814939 0.5280268 NA NA
## Cnr1 0.23799331 0.08026756 0.1843032 0.2985507 NA
# plot the smallest correlations
cor_kendall_vec <- sort(abs(cor_kendall_mat), decreasing = T)
plot(cor_kendall_vec)
#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_kendall_mat) == cor_kendall_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_kendall_mat[idx1, idx2], 3)))
}
#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_kendall_mat) == rev(cor_kendall_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_kendall_mat[idx1, idx2], 3)))
}
library(energy)
dist_cor_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))
for (i in 2:ncol(sub_dat)){
for (j in 1:(i-1)){
dist_cor_mat[i,j] <- dcor(as.numeric(sub_dat[, i]), as.numeric(sub_dat[, j]))
}
}
dist_cor_mat[upper.tri(dist_cor_mat, diag = T)] <- NA
dist_cor_mat[1:5,1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] NA NA NA NA NA
## [2,] 0.2584206 NA NA NA NA
## [3,] 0.6414468 0.5080579 NA NA NA
## [4,] 0.6261122 0.3853323 0.9360060 NA NA
## [5,] 0.7433247 0.2485128 0.6455535 0.7043314 NA
# plot the smallest correlations
dist_cor_vec <- sort(abs(dist_cor_mat), decreasing = T)
plot(dist_cor_vec)
#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(dist_cor_mat) == dist_cor_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(dat[,idx1], dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(dist_cor_mat[idx1, idx2], 3)))
}
#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(dist_cor_mat) == rev(dist_cor_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(dat[,idx1], dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(dist_cor_mat[idx1, idx2], 3)))
}
library(Hmisc)
hoeffd_cor_mat <- hoeffd(x = as.matrix(sub_dat))
hoeff_dist <- hoeffd_cor_mat$D
hoeff_dist[upper.tri(hoeff_dist, diag = T)] <- NA
# plot the smallest correlations
cor_hoeff_vec <- sort(abs(hoeff_dist), decreasing = T)
plot(cor_hoeff_vec)
#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(hoeff_dist) == (cor_hoeff_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(hoeff_dist[idx1, idx2], 3)))
}
#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(hoeff_dist) == rev(cor_hoeff_vec)[i], arr.ind = T)
idx1 <- idx[1,1]; idx2 <- idx[1,2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(hoeff_dist[idx1, idx2], 3)))
}
library(entropy)
mi_cor_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))
for (i in 1:ncol(sub_dat)){
for (j in 1:ncol(sub_dat)){
y2d <- discretize2d(as.matrix(sub_dat[, i]),
as.matrix(sub_dat[, j]),
numBins1 = 20,
numBins2 = 20)
mi_cor_mat[i,j] <- as.numeric(mi.empirical(y2d))
}
}
# plot the smallest correlations
cor_mi_vec <- sort(abs(mi_cor_mat), decreasing = T)
plot(cor_mi_vec)
#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(mi_cor_mat) == (cor_mi_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[1,2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(mi_cor_mat[idx1, idx2], 3)))
}
#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(mi_cor_mat) == rev(cor_mi_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[1,2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(mi_cor_mat[idx1, idx2], 3)))
}
library(minerva)
cor_MIC <- mine(sub_dat)
cor_MIC_mat <- cor_MIC$MIC
cor_MIC_mat[upper.tri(cor_MIC_mat, diag = T)] <- NA
cor_MIC_vec <- sort(abs(cor_MIC_mat), decreasing = T)
plot(cor_MIC_vec)
#plot the high correlations
par(mfrow = c(2,2))
idx <- which(abs(cor_MIC_mat) == (cor_MIC_vec)[i], arr.ind = T)
for(i in 1:4){
idx1 <- idx[i, 1]; idx2 <- idx[i,2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_MIC_mat[idx1, idx2], 3)))
}
#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_MIC_mat) == rev(cor_MIC_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_MIC_mat[idx1, idx2], 3)))
}
# low pearson and high spearman
cor_contrast1 <- (abs(cor_pearson_mat) < 0.3) & (abs(cor_spearman_mat) > 0.7)
cor_contrast_ind1 <- which(cor_contrast1, arr.ind = T)
# high pearson and low spearman
cor_contrast2 <- (abs(cor_pearson_mat) > 0.8) & (abs(cor_spearman_mat) < 0.2)
cor_contrast_ind2 <- which(cor_contrast2, arr.ind = T)
nrow(cor_contrast_ind2)
## [1] 62
# low pearson and high kendall
cor_contrast3 <- (abs(cor_pearson_mat) < 0.2) & (abs(cor_kendall_mat) > 0.8)
cor_contrast_ind3 <- which(cor_contrast3, arr.ind = T)
nrow(cor_contrast_ind3)
## [1] 0
# high pearson and low kendall
cor_contrast4 <- (abs(cor_pearson_mat) > 0.8) & (abs(cor_kendall_mat) < 0.2)
cor_contrast_ind4 <- which(cor_contrast4, arr.ind = T)
nrow(cor_contrast_ind4)
## [1] 674
# low pearson and high distance correlation
cor_contrast5 <- (abs(cor_pearson_mat) < 0.2) & (dist_cor_mat > 0.6)
cor_contrast_ind5 <- which(cor_contrast5, arr.ind = T)
# high pearson and low distance correlation
cor_contrast6 <- (abs(cor_pearson_mat) > 0.6) & (dist_cor_mat < 0.4)
cor_contrast_ind6 <- which(cor_contrast6, arr.ind = T)
nrow(cor_contrast_ind6)
## [1] 253
# low pearson and high MIC
cor_contrast7 <- (abs(cor_pearson_mat) < 0.2) & (abs(cor_MIC_mat) > 0.75)
cor_contrast_ind7 <- which(cor_contrast7, arr.ind = T)
nrow(cor_contrast_ind7)
## [1] 5
# high pearson and low MIC
cor_contrast8 <- (abs(cor_pearson_mat) > 0.8) & (abs(cor_MIC_mat) < 0.25)
cor_contrast_ind8 <- which(cor_contrast8, arr.ind = T)
nrow(cor_contrast_ind8)
## [1] 3
par(mfrow = c(2, 3))
for (i in 1:nrow(cor_contrast_ind1)){
index1 <- cor_contrast_ind1[i, 1]; index2 <- cor_contrast_ind1[i, 2]
plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[index2], ", (", idx2, ")"),
main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
"\n",
paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3))))
}
par(mfrow = c(2, 3))
for (i in 1:nrow(cor_contrast_ind5)){
index1 <- cor_contrast_ind5[i, 1]; index2 <- cor_contrast_ind5[i, 2]
plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[index2], ", (", idx2, ")"),
main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
"\n",
paste0("Dist.Cor of ", round(dist_cor_mat[index1, index2], 3))))
}
par(mfrow = c(2, 4))
for (i in 1:nrow(cor_contrast_ind7)){
index1 <- cor_contrast_ind7[i, 1]; index2 <- cor_contrast_ind7[i, 2]
plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[index2], ", (", idx2, ")"),
main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
"\n",
paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3))))
}
par(mfrow = c(1,3))
for (i in 1:nrow(cor_contrast_ind8)){
index1 <- cor_contrast_ind8[i, 1]; index2 <- cor_contrast_ind8[i, 2]
plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[index2], ", (", idx2, ")"),
main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
"\n",
paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3))))
}